**** This program uses CHIP 2002 urban sample to create Tables 2, 4, 9 and 10.
**** The CHIP 2002 data is publicly avaiable from http://www.icpsr.umich.edu/cocoon/ICPSR/STUDY/21741.xml.

clear all
set more on
set mem 200m
set mat 800
capture log close

clear all
capture log close
global chip "E:\CHIPS\2002\urban"                             /* CHIP data folder*/

use $chip\append_h.dta, clear                                 // CHIP 2002 Urban household-level data
gen exp=e + d95340                                            /* household expenditure + edu expenditure for children studying elsewhere */
gen ownhouse= 1-(b29>0)                                       /* own an apartment */
gen inkindinc = (c_1+c_2+c_3)                                 /* inkind income */

sort pcode
keep pcode exp ownhouse inkindinc
save temp, replace

use $chip\append_p.dta, clear                                 // CHIP 2002 Urban individual-level appendix data
sort pcode
by pcode: egen hsize=count(a1) if a1~=99                      /* household size */
by pcode: egen cashincome=sum(a15)                            /* total household cash income */
collapse (first) hsize cashincome, by(pcode)

merge pcode using temp
drop _merge

gen income = cashincome+inkindinc                             // total househld income
gen saving = log(income/exp)                                  /* savig rate: ln(Y/C) */
gen savingr= (income-exp)/income                              /* saving rate 100-y/c*100 */

sort pcode
save temp, replace
 
use $chip\uind.dta, clear                                         // CHIP 2002 urban individual-level data
sort pcode

by pcode: egen hpop=count(p102)                                    /* total number of family members*/
        
	******Household head information ****
by pcode: egen hage=max(p106*(p103==1))                           /* household head age */
by pcode: egen mage=max(p106*(p105==2 &(p103==1 | p103==2)))      // mother's age
by pcode: egen hsex=max(p105*(p103==1))                           /* household head sex */
by pcode: egen hedu=max(p113*(p103==1))                           /* household head year of schooling*/
by pcode: egen hmin=max(p108*(p103==1 | p103==2))                 /* minorty if hh head or the spouse*/  

          /* household health information */
by pcode: egen hdisable=max(p107==4)                              /* unable to work because of disabled */
by pcode: egen badhealth=max(p118==5 | p120==1)                   /* having a family member with bad health*/


    ******Children information******
drop if p102==.
by pcode: egen age5=max((p106>4 & p106<10) )                       /* child age 5-9*/
by pcode: egen age15=max((p106>14 & p106<20) & p109==1)            /* child age 15-19*/
by pcode: egen age10=max((p106>9 & p106<15) )                      /* child age 10-14*/
by pcode: egen sona=sum(p105==1 & p103==3 & p109==1 )              /* number of unmarried sons*/
by pcode: egen daughtera=sum(p105==2 & p103==3 & p109==1)          /* number of unmarried daughters*/ 
by pcode: egen son=sum(p105==1 & p103==3 & p106<20)                /* number of sons younger than 20*/
by pcode: egen daughter=sum(p105==2 & p103==3 & p106<20)           /* number of daughters younger than 20*/ 

by pcode: egen tson=sum(p105==1 & p103==3)                         // total number of sons
by pcode: egen tdaughter=sum(p105==2 & p103==3)                    // total number of daughters

by pcode: egen singleson=max(((p106>25 & p106<35) & p105==1 & p109==1))    /* unmarriaged son over 25 years old */
by pcode: egen singlegirl=max(((p106>25 & p106<35) & p105==2 & p109==1))   /* unmarriaged daughter over 25 years old */

collapse (first) city hage mage hsex hedu hmin hpop age* badhealth hdisable singleson singlegirl son daughter tson tdaughter sona daughtera, by(pcode)
gen child=son+daughter
merge pcode using temp
rename city code

  /* matching the CHIP code with 1990 census code*/
replace code=110100 if code< 120000                          /* change to Beijing city code as there are not district level data in 90*/
replace code=341281 if code==341600                          /* Haozhou*/
replace code=420521 if code==420500                          /* Jingzhou, jiangling*/
replace code=421202 if code==421200                          /* Xian'An */
replace code=500000 if code==500100                          /* Chongqing*/
replace code=530113 if code==530200                          /* Dongchuan*/
replace code=533001 if code==530500                          /* Baoshan*/

drop _merge
sort code
duplicates drop pcode, force
save temp, replace


use D:\dq\ShangJin\Writings\saving\jpe\Final\sexratio_1990.dta     // sex ratio from 1990 census: 0-9 in 1990
duplicates drop code, force
sort code
merge code using temp
drop _merge 
sort pcode
drop if code==. | code== 9999999 | hage==.

   /* filling in a few missing values: using the cohort of 10-19 in 2000 census ///
      because there are no corresponding names in the 1990 census */
replace sr=1.100125462 if code==140800                         /* Yunchen*/
replace sr=1.06589263 if code==421000                          /* Jingzhou*/
sort code
save temp, replace


merge code using temp                                           /* merge sex ratio and CHIP 2002 Urban sample */
drop if hage==. | income==.
gen pinc=income/hpop                                            /* per capita income*/
gen pexp = exp/hpop                                             /* per capita expenditure */
gen loginc=log(pinc)                                            /* per capita income (log) */
gen loginc2=loginc*loginc     
gen poorhealth=(badhealth==1 | hdisable==1)                     /* variable for poor health*/     
egen gini = inequal(pinc), by(code) weight(hpop) index(gini)    /* gini by county for income*/

********prepare the subsamples for regression analyses*****
   ***** Three-person nuclear families
gen oneson=(son==1  & child==1 & hpop==3)                        // Three-person nuclear family with a son
gen onegirl=(daughter==1 & child==1 & hpop==3)                   // Three-person nuclear family with a daughter       
gen duplicate=(oneson & onegirl)                                 // Drop a few duplicate households 
replace oneson=0 if duplicate==1
replace onegirl=0 if duplicate==1
gen onechild = oneson+onegirl                                    // Three-person nuclear family 
gen oneson40=(oneson & mage<40)                                  // Three-person nuclear family with a son: mother's age < 40
gen onegirl40=(onegirl & mage<40)                                // Three-person nuclear family with a daughter: mother's age < 40
gen onechild40=oneson40+onegirl40                                // Three-person nuclear family with a child: mother's age < 40
gen oneson45=(oneson & mage<45)                                  // Three-person nuclear family with a son: mother's age < 45
gen onegirl45=(onegirl & mage<45)                                // Three-person nuclear family with a daughter: mother's age < 45
gen onechild45=oneson45+onegirl45                                // Three-person nuclear family with a child: mother's age < 45

   ***** Three-person nuclear families with alternative definition on child (no explicit marriage status)
gen onesona=(sona==1 & onechild==1 & hpop==3)                    // Three-person nuclear family with a son: no explicit marriage status for son
gen onegirla=(daughtera==1  & onechild==1 & hpop==3)             // Three-person nuclear family with a daughter: no explicit marriage status for daughter
gen onechilda = oneson+onegirl                                   // Three-person nuclear family: no explicit marriage status for child
gen oneson40a=(onesona & mage<40)                                // Three-person nuclear family with a son: mother's age < 40
gen onegirl40a=(onegirla & mage<40)                              // Three-person nuclear family with a daughter: mother's age < 40
gen onechild40a=oneson40a+onegirl40a                             // Three-person nuclear family with a child: mother's age < 40
gen oneson45a=(onesona & mage<45)                                // Three-person nuclear family with a son: mother's age < 45
gen onegirl45a=(onegirla & mage<45)                              // Three-person nuclear family with a daughter: mother's age < 45
gen onechild45a=oneson45a+onegirl45a                             // Three-person nuclear family with a child: mother's age < 45
 
                   
   ****** Extended family with one child (no restriction on household size)****
gen son1=(son==1 & child==1 )                                    // Extended family with a son (only child)           
gen girl1=(daughter==1 & child==1)                               // Extended family with a daughter (only child)  
gen child1=son1+girl1                                            // Extended family with only one child 
gen oneson1_40=(son1 & mage<40)                                  // Extended family with a son (only child): mother's age < 40
gen onegirl1_40=(girl1 & mage<40)                                // Extended family with a daughter (only child): mother's age<40
gen onechild1_40=oneson1_40+onegirl1_40                          // Extended family with only one child: mother's age<40  
gen oneson1_45=(son1 & mage<45)                                  // Extended family with a son (only child): mother's age < 45
gen onegirl1_45=(girl1 & mage<45)                                // Extended family with a daughter (only child): mother's age<45
gen onechild1_45=oneson1_45+onegirl1_45                          // Extended family with a only one: mother's age<40

   ****** Extended family with one child (no restriction on household size) and alternative definition on child (no explicit marriage status)****
gen son1a=(sona==1 & child==1 )                                  // Extended family with a son (only child)
gen girl1a=(daughtera==1 & child==1)                             // Extended family with a daughter (only child)  
gen child1a=son1a+girl1a                                         // Extended family with only one child 
gen oneson1_40a=(son1a & mage<40)                                // Extended family with a son (only child): mother's age < 40
gen onegirl1_40a=(girl1a & mage<40)                              // Extended family with a daughter (only child): mother's age<40
gen onechild1_40a=oneson1_40a+onegirl1_40a                       // Extended family with only one child: mother's age<40  
gen oneson1_45a=(son1a & mage<45)                                // Extended family with a son (only child): mother's age < 45
gen onegirl1_45a=(girl1a & mage<45)                              // Extended family with a daughter (only child): mother's age<45
gen onechild1_45a=oneson1_45a+onegirl1_45a                       // Extended family with a only one: mother's age<40


  ******* Define the trimmed samples for whole sample*************
egen outlier95= pctile(saving),p(95)                             // above 95% 
egen outlier5= pctile(saving),p(5)                               //  5% 
egen outlier99= pctile(saving),p(99)                             // above 99%
egen outlier1= pctile(saving),p(1)                               // 5%
gen range1=(saving>outlier5 & saving<outlier95)                  // trimmed smaple: between 5th and 95th percentile
gen range2=(saving>outlier1 & saving<outlier99)                  // trimmed sample: between 1st and 99th percentile
gen poor = (pinc<3000 | pexp<3000)                               // Families with income or expenditure below 3000 RMB


 ****** Trim different subsamples
foreach x of varlist oneson onegirl onechild son1 girl1 child1 oneson40 onegirl40 oneson40a onegirl40a /// 
oneson45 onegirl45 oneson45a onegirl45a onechild40 onechild45 onechild40a onechild45a oneson1_40 onegirl1_40 onechild1_40 /// 
oneson1_40a onegirl1_40a onechild1_40a oneson1_45 onegirl1_45 onechild1_45 oneson1_45a onegirl1_45a onechild1_45a {
 egen outlier95_`x'= pctile(savingr) if `x',p(95)     // 95%
 egen outlier5_`x'= pctile(savingr) if `x',p(5)       // 5%
 egen outlier99_`x'= pctile(savingr) if `x',p(99)     // 99%
 egen outlier1_`x'= pctile(savingr) if `x',p(1)       // 1%
 gen range1_`x'=(savingr>outlier5_`x' & savingr<outlier95_`x')
 gen range2_`x'=(savingr>outlier1_`x' & savingr<outlier99_`x')
 drop outlier95_`x' outlier5_`x' outlier99_`x' outlier1_`x'
}

   ***** Define the adult single child variable for multinominal logit analysis
gen single=1 if singleson==1                            
replace single=2 if singlegirl==1
replace single=0 if single==.
replace single=3 if singleson & singlegirl


 *********** Table 2: Material Wealth and Marital Status: Which Families Are More Likely to Have an Unmarried Adult Child? (Urban part)
global xsingle ownhouse loginc hsize hage hsex hedu hmin poorhealth  
eststo a1: xi: quietly logit singleson $xsingle if hage>49 & hage<61 & singlegirl==0, r      // Logit: having a single son
eststo a2: xi: quietly logit singlegirl $xsingle  if hage>49 & hage<61 &  singleson==0, r    // Logit: having a single daughter
eststo a3: xi: quietly mlogit single $xsingle  if hage>49 & hage<61 & single<3 , r           // multinominal logit: Having a single son or daugther
estout a1 a2 a3, cells(b(star fmt(%9.2f)) se(par)) drop(_cons)  starlevels(* .10 ** .05) ///
        stats(r2_a aic  N, fmt(%9.2f %9.1f %9.0g) ) replace 


************Table 4: Summary Statics on Household Savings in 2002 (Urban Part)
tabstat saving [aw=hpop] if oneson & mage<40, stat(mean median max min sd n) 
tabstat saving [aw=hpop] if oneson & mage<40, stat(mean median max min sd n) 
tabstat saving [aw=hpop], stat(mean median max min sd n)

************ Table 9 Urban Household-level Savings for Three-person Households in 2002 **************************
global xlist sr loginc loginc2 age5 age10 age15 hage hsex hedu hmin poorhealth ownhouse gini
eststo ra1: quietly reg saving $xlist if oneson40, r                        // One son: full sample
eststo ra2: quietly reg saving $xlist if onegirl40, r                       // one daugther: full sample
eststo ra3: quietly reg saving $xlist if oneson40 & poor==0, r              // One son: remove those households with income or expenditure < 3000 RMB
eststo ra4: quietly reg saving $xlist if onegirl40 & poor==0, r             // One daughter: remove those households with income or expenditure < 3000 RMB
eststo ra5: quietly reg saving $xlist if range1_oneson40, r                 // One son: remove the top and bottom 5% savers
eststo ra6: quietly reg saving $xlist if range1_onegirl40, r                // One daughter: remove the top and bottom 5% savers
eststo ra7: quietly reg saving $xlist if range1_oneson40a, r                // One son: remover the top and bottom 5% savers and no explicit marriage status
eststo ra8: quietly reg saving $xlist if range1_onegirl40a, r               // One daughter: remover the top and bottom 5% savers and no explicit marriage status
estout ra*, cells(b(star fmt(%9.2f)) se(par)) drop(_cons)  starlevels(* .10 ** .05) ///
        stats(r2_a aic  N, fmt(%9.2f %9.1f %9.0g) ) replace 
   
   
**************** Table 10: Robust Checks on Urban Household-Level Regressions for Three-perosn Households in 2002*******
       // OLS regressions for sample with mother age < 45
eststo rb1: xi: quietly reg saving $xlist if oneson45, r                       // One son: full sample
eststo rb2: xi: quietly reg saving $xlist if onegirl45, r	                   // One daughter: fall sample
eststo rb3: xi: quietly reg saving $xlist if oneson45 & poor==0, r             // One son: remove those poor with income expenditure less than 3000 RMB
eststo rb4: xi: quietly reg saving $xlist if onegirl45 & poor==0, r            // One daughter: remove those poor with income expenditure less than 3000 RMB
eststo rb5: xi: quietly reg saving $xlist if oneson45 & range2_oneson45, r     // One son: remove the top and bottom 1% savers
eststo rb6: xi: quietly reg saving $xlist if onegirl45 &  range2_onegirl45, r  // One daughter: remove the top and bottom 1% savers
eststo rb7: xi: quietly reg saving $xlist if range1_oneson45, r                // One son: remove the top and bottom 5% savers
eststo rb8: xi: quietly reg saving $xlist if range1_onegirl45, r               // One daughter: remove the top and bottom 5% savers
eststo rb9: xi: quietly reg saving $xlist if range1_oneson45a, r               // One son: remover the top and bottom 5% savers and no explicit marriage status
estout rb* using robust.xls, keep(sr) cells(b(star fmt(%9.2f) ) se(par ))   starlevels(* .10 ** .05 *** .01)  replace
		
	   // median regressions for sampy with mother age<45
eststo ma1: xi: quietly qreg saving $xlist if oneson45                         // One son: full sample
eststo ma2: xi: quietly qreg saving $xlist if onegirl45	                       // One daughter: fall sample
eststo ma3: xi: quietly qreg saving $xlist if oneson45 & poor==0               // One son: remove those poor with income expenditure less than 3000 RMB
eststo ma4: xi: quietly qreg saving $xlist if onegirl45 & poor==0              // One daughter: remove those poor with income expenditure less than 3000 RMB
eststo ma5: xi: quietly qreg saving $xlist if oneson45 & range2_oneson45       // One son: remove the top and bottom 1% savers
eststo ma6: xi: quietly qreg saving $xlist if onegirl45 & range2_onegirl45     // One daughter: remove the top and bottom 1% savers
eststo ma7: xi: quietly qreg saving $xlist if range1_oneson45                  // One son: remove the top and bottom 5% savers
eststo ma8: xi: quietly qreg saving $xlist if range1_onegirl45                 // One daughter: remove the top and bottom 5% savers
eststo ma9: xi: quietly qreg saving $xlist if range1_oneson45a                 // One son: remover the top and bottom 5% savers and no explicit marriage status
eststo ma10: xi: quietly qreg saving $xlist if range1_onegirl45a               // One son: remover the top and bottom 5% savers and no explicit marriage status
estout ma* using robust.xls, keep(sr) cells(b(star fmt(%9.2f) ) se(par ))   starlevels(* .10 ** .05 *** .01) ///
   stats(N, fmt(%9.0g)) append
   
   
************ For extended families with one child **************************   
       // OLS regressions for ln(Y/C) with mother age < 45
eststo rb1: xi: quietly reg saving $xlist if oneson1_45, r                     // One son: full sample
eststo rb2: xi: quietly reg saving $xlist if onegirl1_45, r	                   // One daughter: full sample
eststo rb3: xi: quietly reg saving $xlist if oneson1_45 & poor==0, r           // One son: remove those poor with income expenditure less than 3000 RMB
eststo rb4: xi: quietly reg saving $xlist if onegirl1_45 & poor==0, r          // One daughter: remove those poor with income expenditure less than 3000 RMB
eststo rb5: xi: quietly reg saving $xlist if range2_oneson1_45, r              // One son: remove the top and bottom 1% savers
eststo rb6: xi: quietly reg saving $xlist if range2_onegirl1_45, r             // One daughter: remove the top and bottom 1% savers
eststo rb7: xi: quietly reg saving $xlist if range1_oneson1_45, r              // One son: remove the top and bottom 5% savers
eststo rb8: xi: quietly reg saving $xlist if range1_onegirl1_45, r             // One daughter: remove the top and bottom 5% savers
eststo rb9: xi: quietly reg saving $xlist if range1_oneson1_45a, r             // One son: remover the top and bottom 5% savers and no explicit marriage status
eststo rb10: xi: quietly reg saving $xlist if range1_onegirl1_45a, r           // One son: remover the top and bottom 5% savers and no explicit marriage status
estout rb* using robust.xls, keep(sr) cells(b(star fmt(%9.2f) ) se(par ))   starlevels(* .10 ** .05 *** .01)  append
		
	   // median regressions for (Y-C)/Y with mother age<45
eststo ma1: xi: quietly qreg saving $xlist if oneson1_45                        // One son: full sample
eststo ma2: xi: quietly qreg saving $xlist if onegirl1_45	                    // One daughter: fall sample
eststo ma3: xi: quietly qreg saving $xlist if oneson1_45 & poor==0              // One son: remove those poor with income expenditure less than 3000 RMB
eststo ma4: xi: quietly qreg saving $xlist if onegirl1_45 & poor==0             // One daughter: remove those poor with income expenditure less than 3000 RMB
eststo ma5: xi: quietly qreg saving $xlist if range2_oneson1_45                 // One son: remove the top and bottom 1% savers
eststo ma6: xi: quietly qreg saving $xlist if range2_onegirl1_45                // One daughter: remove the top and bottom 1% savers
eststo ma7: xi: quietly qreg saving $xlist if range1_oneson1_45                 // One son: remove the top and bottom 5% savers
eststo ma8: xi: quietly qreg saving $xlist if range1_onegirl1_45                // One daughter: remove the top and bottom 5% savers
eststo ma9: xi: quietly qreg saving $xlist if range1_oneson1_45a                // One son: remover the top and bottom 5% savers and no explicit marriage status
eststo ma10: xi: quietly qreg saving $xlist if range1_onegirl1_45a              // One son: remover the top and bottom 5% savers and no explicit marriage status
estout ma* using robust.xls, keep(sr) cells(b(star fmt(%9.2f) ) se(par ))   starlevels(* .10 ** .05 *** .01) ///
   stats(N, fmt(%9.0g)) append
		         
